import time
from user.models import Position, Keyword
import requests


def analysis(result):
    cities = list()
    educations = list()
    salary_list = list()
    work_year_list = list()
    # 工作技能要求
    company_label_list = list()
    # 公司福利
    position_advantage = list()
    company_name_list = list()
    industry_field = list()

    for data in result:
        cities.append(data['city'])
        educations.append(data['education'])
        salary_list.append(data['salary'])
        work_year_list.append(data['workYear'])
        company_label_list.append(','.join(data['companyLabelList']))
        position_advantage.append(data['positionAdvantage'])
        company_name_list.append(data['companyShortName'])
        industry_field.append(data['industryField'])

    return list(zip(cities, educations, salary_list,
                    work_year_list, company_label_list,
                    position_advantage, company_name_list, industry_field))


def savePosition(position_list, keyword):
    # 存入关键词
    key = Keyword(word=keyword)
    key.save()

    lst = list()

    # 存入爬虫爬取的数据
    for i in position_list:
        lst.append(
            Position(
                location=i[0], education=i[1],
                salary=i[2], experience=i[3], requirements=i[4],
                benifits=i[5], company=i[6], profession=i[7], keyword=keyword
            ))
    Position.objects.bulk_create(lst)


class LagouSpider:
    def __init__(self):
        self.session = requests.Session()
        self.url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false'
        self.headers = {
            'referer': 'https://www.lagou.com/jobs/list_%E4%BC%9A%E8%AE%A1?labelWords=&fromSearch=true&suginput=',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML'
                          ', like Gecko) Chrome/84.0.4147.89 Safari/537.36',
            'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
            'origin': 'https://www.lagou.com',
        }

    def login(self):
        # 模拟登录
        self.session.get('https://www.lagou.com/jobs/list_python?px=new&city=%E5%85%A8%E5%9B%BD',
                             headers=self.headers, timeout=3)

    def spider(self, keyword):
        self.login()
        # 数据页数
        page = 1
        data = {
            'first': 'true',
            'pn': page,
            'kd': keyword,
            'pageSize': 50
        }
        response = self.session.post(self.url, data=data, headers=self.headers)
        position_result = response.json()['content']['positionResult']
        result_size = position_result['resultSize']

        # total_count = position_result['totalCount']
        # pages = (total_count-result_size) // result_size
        position_list = list()

        while result_size > 0:
            # 进行json数据处理
            position_list.extend(analysis(position_result['result']))

            # 请求下一页
            print(page)
            page += 1
            data['pn'] = page
            response = self.session.post(self.url, data=data, headers=self.headers)

            position_result = response.json()['content']['positionResult']
            result_size = position_result['resultSize']

            if page % 10 == 0:
                self.session.cookies.clear()
                self.login()

        savePosition(position_list, keyword)
